do "...\First.do"

* In this file, I find mothers and fathers, merge with information about education
* and age and save in seperate datasets 
* For now in the full population and later merge with PCP sample

*******************************************************************************
* Mothers
*******************************************************************************

clear
use "$raw\mom_udd.dta"

* hfaudd: Coded highest level of completed education, merge with formats to group on a higher level
format hfaudd %4.0f
drop if missing(hfaudd)
tostring hfaudd, replace

drop if missing(hfaudd)

g lenght=strlen(hfaudd)
tab lenght
replace hfaudd="0"+hfaudd if lenght==3
replace hfaudd="000"+hfaudd if lenght==1


merge m:1 hfaudd using "...\formats\hfaudd_level.dta"
drop if _merge==2
drop _merge

 
** Use the highest level of completed education over the year period
destring udd_level, replace
bys pnr: egen max=max(udd_level)
drop if udd_level!=max

keep pnr max 
duplicates drop

rename max mor_udd_level


** Add age and year to dataset of mothers
merge 1:m pnr using "$raw\patient_background_moreyears.dta", keepus(alder year)
drop if _merge==2
drop _merge

g yob=year-alder
drop alder year

bys pnr: egen median=median(yob)
drop yob
rename median yob

duplicates drop
bys pnr: g N=_N
tab N 
drop N

rename pnr mor_id


save "$work\mom_udd.dta", replace



*******************************************************************************
* Fathers
*******************************************************************************

clear
use "$raw\dad_udd.dta"

* hfaudd: Coded highest level of completed education, merge with formats to group on a higher level
format hfaudd %4.0f
drop if missing(hfaudd)
tostring hfaudd, replace

drop if missing(hfaudd)

g lenght=strlen(hfaudd)
tab lenght
replace hfaudd="0"+hfaudd if lenght==3
replace hfaudd="000"+hfaudd if lenght==1


merge m:1 hfaudd using "...\formats\hfaudd_level.dta"
drop if _merge==2
drop _merge


** Use the highest level of completed education over the year period
destring udd_level, replace
bys pnr: egen max=max(udd_level)
drop if udd_level!=max

keep pnr max 
duplicates drop

rename max far_udd_level

merge 1:m pnr using "$raw\patient_background_moreyears.dta", keepus(alder year)
drop if _merge==2
drop _merge

g yob=year-alder
drop alder year

bys pnr: egen median=median(yob)
drop yob
rename median yob


duplicates drop
bys pnr: g N=_N
tab N 
drop N

rename pnr far_id

save "$work\dad_udd.dta", replace


